# you may need to install the packages
# install.packages("stringr")
# install.packages("plotly")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Mobile Food Schedule Data

dat <- read.csv('mobile-food-sf.csv', stringsAsFactors = FALSE)

Plots with Plotly

day_freqs <- table(dat$DayOfWeekStr)

barplot(day_freqs, border = NA, las = 3)

plot_ly(x = names(day_freqs), 
        y = day_freqs,
        type = 'bar')
# day frequencies table
day_counts <- dat %>% 
  select(DayOfWeekStr) %>%
  group_by(DayOfWeekStr) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

day_counts
## # A tibble: 7 x 2
##   DayOfWeekStr count
##   <chr>        <int>
## 1 Friday        1105
## 2 Wednesday     1095
## 3 Thursday      1090
## 4 Tuesday       1081
## 5 Monday        1080
## 6 Saturday       533
## 7 Sunday         263
plot_ly(day_counts, 
        x = ~DayOfWeekStr, 
        y = ~count,
        type = 'bar')
plot_ly(day_counts, 
        x = ~reorder(DayOfWeekStr, count), 
        y = ~count,
        type = 'bar')

Changing Times

# toy string
time1 <- '10AM'

# hour
str_sub(time1, start = 1, end = 2)
## [1] "10"
# period
str_sub(time1, start = 3, end = 4)
## [1] "AM"
times <- c('12PM', '10AM', '9AM', '8AM', '2PM')

# subset time
str_sub(times, start = 0, end = -3)
## [1] "12" "10" "9"  "8"  "2"
# subset period
str_sub(times, start = -2)
## [1] "PM" "AM" "AM" "AM" "PM"
hours <- as.numeric(str_sub(times, start = 0, end = -3))
hours 
## [1] 12 10  9  8  2
periods <- str_sub(times, start = -2)
periods
## [1] "PM" "AM" "AM" "AM" "PM"
freq <- table(periods)
freq
## periods
## AM PM 
##  3  2
plot_ly(x = names(freq), 
        y = freq,
        type = 'bar')
times <- c('12PM', '10AM', '9AM', '8AM', '12AM')

start24 <- function(x) {
  y <- rep(0, length(x))
  hours <- as.numeric(str_sub(x, start = 0, end = -3))
  period <- str_sub(x, start = -2)
  for (i in 1:length(x)) {
    if (period[i] == "PM") {
      if (hours[i] == 12) {
        y[i] = 12
      }
      else {
        y[i] = hours[i] + 12
      }
    }
    else {
      if (hours[i] == 12) {
        y[i] = 24
      }
      else {
        y[i] = hours[i]
      }
    }
  }
  return(y)
}
  

start24(times)
## [1] 12 10  9  8 24
dat <- dat %>% mutate('start' = start24(starttime), 'end' = start24(endtime))

dat <- dat %>% mutate('duration' = end - start)

Latitude and Longitude Coordinates

loc1 <- "(37.7651967350509,-122.416451692902)"

# "remove" opening parenthesis 
str_replace(loc1, pattern = '\\(', replacement = '')
## [1] "37.7651967350509,-122.416451692902)"
# "remove" closing parenthesis
str_replace(loc1, pattern = '\\)', replacement = '')
## [1] "(37.7651967350509,-122.416451692902"
loc_all <- dat$Location

loc2 <- str_replace_all(loc_all, pattern = '\\(|\\)', replacement = '')

lat_lon <- str_split(loc2, pattern = ',')

lat <- lapply(lat_lon, function(x) x[1])

lon <- lapply(lat_lon, function(x) x[2])

lat1 <- as.numeric(unlist(lat))
lon <- as.numeric(unlist(lon))
dat <- dat %>% mutate('lat' = lat1, 'lon' = lon)

Plotting locations on a map

plot(dat$lon, dat$lat, pch = 19, col = "#77777744")

plot_ly(x = dat$lon, y = dat$lat)
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: Ignoring 40 observations
plot_ly(data = dat, x = ~lon, y = ~lat, type = 'scatter', mode = 'markers')
## Warning: Ignoring 40 observations

Maps with “RgoogleMaps”

library(RgoogleMaps)
# coordinates for center of the map
center <- c(mean(dat$lat, na.rm = TRUE), mean(dat$lon, na.rm = TRUE))

# zoom value
zoom <- min(MaxZoom(range(dat$lat, na.rm = TRUE), 
                    range(dat$lon, na.rm = TRUE)))

# san francisco map
map1 <- GetMap(center=center, zoom=zoom, destfile = "san-francisco.png")
PlotOnStaticMap(map1, dat$lat, dat$lon, col = "#ed4964", pch=20)

Maps with “ggmap”

library(ggmap)
## 
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
## 
##     wind
# let's get rid of rows with missing values
dat <- na.omit(dat)

# ggmap typically asks you for a zoom level, 
# but we can try using ggmap's make_bbox function:
sbbox <- make_bbox(lon = dat$lon, lat = dat$lat, f = .1)
sbbox
##       left     bottom      right        top 
## -122.48867   37.69985 -122.36281   37.81595
# get a 'terrain' map
sf_map <- get_map(location = sbbox, maptype = "terrain", source = "google")
## Warning: bounding box given to google - spatial extent only approximate.
## converting bounding box to center/zoom specification. (experimental)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=37.757897,-122.425744&zoom=13&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
ggmap(sf_map) + 
  geom_point(data = dat, 
             mapping = aes(x = lon, y = lat), 
             color = "red", alpha = 0.2, size = 1)
## Warning: Removed 98 rows containing missing values (geom_point).

Let’s look for specific types of food

foods <- dat$optionaltext
foods <- str_replace(foods, "Burritos", "burritos") 
d <- str_detect(foods, "burritos")
newdat <- dat[d, ]

PlotOnStaticMap(map1, newdat$lat, newdat$lon, col = "#ed4964", pch=20)